{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "04aecfba-d254-4c28-b472-025932bc8a28",
   "metadata": {},
   "source": [
    "# Evaluation part I\n",
    "\n",
    "Evaluate LLM responses when there is a single \"right answer\"."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5f3ebd6b-8982-4b34-8c2f-90139749f122",
   "metadata": {},
   "source": [
    "## Setup\n",
    "#### Load the API key and relevant Python libaries.\n",
    "In this course, we've provided some code that loads the OpenAI API key for you."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "739371db",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "import sys\n",
    "sys.path.append('../..')\n",
    "import utils\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv()) # read local .env file\n",
    "\n",
    "openai.api_key  = os.environ['OPENAI_API_KEY']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7b84b08a",
   "metadata": {
    "height": 149
   },
   "outputs": [],
   "source": [
    "def get_completion_from_messages(messages, model=\"gpt-3.5-turbo\", temperature=0, max_tokens=500):\n",
    "    response = openai.ChatCompletion.create(\n",
    "        model=model,\n",
    "        messages=messages,\n",
    "        temperature=temperature, \n",
    "        max_tokens=max_tokens, \n",
    "    )\n",
    "    return response.choices[0].message[\"content\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b90ab304-3357-4f00-bac6-061878868de2",
   "metadata": {},
   "source": [
    "#### Get the relevant products and categories\n",
    "Here is the list of products and categories that are in the product catalog."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "423f24ff",
   "metadata": {
    "height": 47
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Computers and Laptops': ['TechPro Ultrabook',\n",
       "  'BlueWave Gaming Laptop',\n",
       "  'PowerLite Convertible',\n",
       "  'TechPro Desktop',\n",
       "  'BlueWave Chromebook'],\n",
       " 'Smartphones and Accessories': ['SmartX ProPhone',\n",
       "  'MobiTech PowerCase',\n",
       "  'SmartX MiniPhone',\n",
       "  'MobiTech Wireless Charger',\n",
       "  'SmartX EarBuds'],\n",
       " 'Televisions and Home Theater Systems': ['CineView 4K TV',\n",
       "  'SoundMax Home Theater',\n",
       "  'CineView 8K TV',\n",
       "  'SoundMax Soundbar',\n",
       "  'CineView OLED TV'],\n",
       " 'Gaming Consoles and Accessories': ['GameSphere X',\n",
       "  'ProGamer Controller',\n",
       "  'GameSphere Y',\n",
       "  'ProGamer Racing Wheel',\n",
       "  'GameSphere VR Headset'],\n",
       " 'Audio Equipment': ['AudioPhonic Noise-Canceling Headphones',\n",
       "  'WaveSound Bluetooth Speaker',\n",
       "  'AudioPhonic True Wireless Earbuds',\n",
       "  'WaveSound Soundbar',\n",
       "  'AudioPhonic Turntable'],\n",
       " 'Cameras and Camcorders': ['FotoSnap DSLR Camera',\n",
       "  'ActionCam 4K',\n",
       "  'FotoSnap Mirrorless Camera',\n",
       "  'ZoomMaster Camcorder',\n",
       "  'FotoSnap Instant Camera']}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "products_and_category = utils.get_products_and_category()\n",
    "products_and_category"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "414d60f4",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(products_and_category)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7f1d1cb4-72f0-4a1a-9dd2-c5a7305ce249",
   "metadata": {},
   "source": [
    "### Find relevant product and category names (version 1)\n",
    "This could be the version that is running in production."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7aad328a",
   "metadata": {
    "height": 776
   },
   "outputs": [],
   "source": [
    "def find_category_and_product_v1(user_input,products_and_category):\n",
    "\n",
    "    delimiter = \"####\"\n",
    "    system_message = f\"\"\"\n",
    "    You will be provided with customer service queries. \\\n",
    "    The customer service query will be delimited with {delimiter} characters.\n",
    "    Output a python list of json objects, where each object has the following format:\n",
    "        'category': <one of Computers and Laptops, Smartphones and Accessories, Televisions and Home Theater Systems, \\\n",
    "    Gaming Consoles and Accessories, Audio Equipment, Cameras and Camcorders>,\n",
    "    AND\n",
    "        'products': <a list of products that must be found in the allowed products below>\n",
    "\n",
    "\n",
    "    Where the categories and products must be found in the customer service query.\n",
    "    If a product is mentioned, it must be associated with the correct category in the allowed products list below.\n",
    "    If no products or categories are found, output an empty list.\n",
    "    \n",
    "\n",
    "    List out all products that are relevant to the customer service query based on how closely it relates\n",
    "    to the product name and product category.\n",
    "    Do not assume, from the name of the product, any features or attributes such as relative quality or price.\n",
    "\n",
    "    The allowed products are provided in JSON format.\n",
    "    The keys of each item represent the category.\n",
    "    The values of each item is a list of products that are within that category.\n",
    "    Allowed products: {products_and_category}\n",
    "    \n",
    "\n",
    "    \"\"\"\n",
    "    \n",
    "    few_shot_user_1 = \"\"\"I want the most expensive computer.\"\"\"\n",
    "    few_shot_assistant_1 = \"\"\" \n",
    "    [{'category': 'Computers and Laptops', \\\n",
    "'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\n",
    "    \"\"\"\n",
    "    \n",
    "    messages =  [  \n",
    "    {'role':'system', 'content': system_message},    \n",
    "    {'role':'user', 'content': f\"{delimiter}{few_shot_user_1}{delimiter}\"},  \n",
    "    {'role':'assistant', 'content': few_shot_assistant_1 },\n",
    "    {'role':'user', 'content': f\"{delimiter}{user_input}{delimiter}\"},  \n",
    "    ] \n",
    "    return get_completion_from_messages(messages)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f13cb2b-e36e-4166-8332-826288e92c61",
   "metadata": {},
   "source": [
    "### Evaluate on some queries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "cce5b29f",
   "metadata": {
    "height": 98
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Televisions and Home Theater Systems', 'products': ['CineView 4K TV', 'SoundMax Home Theater', 'CineView 8K TV', 'SoundMax Soundbar', 'CineView OLED TV']}]\n"
     ]
    }
   ],
   "source": [
    "customer_msg_0 = f\"\"\"Which TV can I buy if I'm on a budget?\"\"\"\n",
    "\n",
    "products_by_category_0 = find_category_and_product_v1(customer_msg_0,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8ad30ad4",
   "metadata": {
    "height": 98
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Smartphones and Accessories', 'products': ['MobiTech Wireless Charger']}]\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "customer_msg_1 = f\"\"\"I need a charger for my smartphone\"\"\"\n",
    "\n",
    "products_by_category_1 = find_category_and_product_v1(customer_msg_1,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "eeed8094",
   "metadata": {
    "height": 115
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\n    [{'category': 'Computers and Laptops', 'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\""
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "customer_msg_2 = f\"\"\"\n",
    "What computers do you have?\"\"\"\n",
    "\n",
    "products_by_category_2 = find_category_and_product_v1(customer_msg_2,\n",
    "                                                      products_and_category)\n",
    "products_by_category_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "01e48b0f",
   "metadata": {
    "height": 132
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Smartphones and Accessories', 'products': ['SmartX ProPhone']}, {'category': 'Cameras and Camcorders', 'products': ['FotoSnap DSLR Camera']}]\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "customer_msg_3 = f\"\"\"\n",
    "tell me about the smartx pro phone and the fotosnap camera, the dslr one.\n",
    "Also, what TVs do you have?\"\"\"\n",
    "\n",
    "products_by_category_3 = find_category_and_product_v1(customer_msg_3,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "8a95e435",
   "metadata": {
    "height": 132
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Smartphones and Accessories', 'products': ['SmartX ProPhone']}, {'category': 'Cameras and Camcorders', 'products': ['FotoSnap DSLR Camera']}]\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "customer_msg_3_2 = f\"\"\"\n",
    "tell me about the smartx pro phone and the fotosnap camera, the dslr one.\\\n",
    "Also, what TVs do you have?\"\"\"\n",
    "\n",
    "products_by_category_3_2 = find_category_and_product_v1(customer_msg_3,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_3_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8787b8a7",
   "metadata": {
    "height": 30
   },
   "source": [
    "### 上面这个例子没有回答出tv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "12513cb1",
   "metadata": {
    "height": 1014
   },
   "outputs": [],
   "source": [
    "# 针对上面那个案例没有回答出TVs\n",
    "def find_category_and_product_v3(user_input,products_and_category):\n",
    "\n",
    "    delimiter = \"####\"\n",
    "    system_message = f\"\"\"\n",
    "    You will be provided with customer service queries. \\\n",
    "    The customer service query will be delimited with {delimiter} characters.\n",
    "    Output a python list of json objects, where each object has the following format:\n",
    "        'category': <one of Computers and Laptops, Smartphones and Accessories, Televisions and Home Theater Systems, \\\n",
    "    Gaming Consoles and Accessories, Audio Equipment, Cameras and Camcorders>,\n",
    "    AND\n",
    "        'products': <a list of products that must be found in the allowed products below>\n",
    "\n",
    "\n",
    "    Where the categories and products must be found in the customer service query.\n",
    "    If a product is mentioned, it must be associated with the correct category in the allowed products list below.\n",
    "    If no products or categories are found, output an empty list.\n",
    "    \n",
    "\n",
    "    List out all products that are relevant to the customer service query based on how closely it relates\n",
    "    to the product name and product category.\n",
    "    Do not assume, from the name of the product, any features or attributes such as relative quality or price.\n",
    "\n",
    "    The allowed products are provided in JSON format.\n",
    "    The keys of each item represent the category.\n",
    "    The values of each item is a list of products that are within that category.\n",
    "    Allowed products: {products_and_category}\n",
    "    \n",
    "\n",
    "    \"\"\"\n",
    "    \n",
    "    few_shot_user_1 = \"\"\"I want the most expensive computer.\"\"\"\n",
    "    few_shot_assistant_1 = \"\"\" \n",
    "    [{'category': 'Computers and Laptops', \\\n",
    "'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\n",
    "    \"\"\"\n",
    "    few_shot_user_2 = f\"\"\"\n",
    "    tell me about the smartx pro phone and the fotosnap camera, the dslr one.\n",
    "    Also, what audio equipments do you have?\"\"\"\n",
    "    few_shot_assistant_2 = \"\"\"\n",
    "    [{'category': 'Smartphones and Accessories', \\\n",
    "    'products': ['SmartX ProPhone']}, \\\n",
    "    {'category': 'Cameras and Camcorders', \\\n",
    "    'products': ['FotoSnap DSLR Camera']}, \\\n",
    "    {'category': 'Audio Equipment', \\\n",
    "    'products': ['AudioPhonic Noise-Canceling Headphones','WaveSound Bluetooth Speaker',\\\n",
    "    'AudioPhonic True Wireless Earbuds','WaveSound Soundbar','AudioPhonic Turntable']}\\\n",
    "    ]\n",
    "    \"\"\"\n",
    "    messages =  [  \n",
    "    {'role':'system', 'content': system_message},    \n",
    "    {'role':'user', 'content': f\"{delimiter}{few_shot_user_1}{delimiter}\"},  \n",
    "    {'role':'assistant', 'content': few_shot_assistant_1 },\n",
    "    {'role':'user', 'content': f\"{delimiter}{few_shot_user_2}{delimiter}\"},  \n",
    "    {'role':'assistant', 'content': few_shot_assistant_2 },\n",
    "    {'role':'user', 'content': f\"{delimiter}{user_input}{delimiter}\"},  \n",
    "    ] \n",
    "    return get_completion_from_messages(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5addd181",
   "metadata": {
    "height": 132
   },
   "outputs": [],
   "source": [
    "customer_msg_3_2 = f\"\"\"\n",
    "tell me about the smartx pro phone and the fotosnap camera, the dslr one.\\\n",
    "Also, what TVs do you have?\"\"\"\n",
    "\n",
    "products_by_category_3_2 = find_category_and_product_v3(customer_msg_3_2,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_3_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b09d273-b88a-4d1c-a5b8-f1e5066b4a2f",
   "metadata": {},
   "source": [
    "### Harder test cases\n",
    "Identify queries found in production, where the model is not working as expected."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9b5bb99e",
   "metadata": {
    "height": 132
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Televisions and Home Theater Systems', 'products': ['CineView 8K TV']},\n",
      "     {'category': 'Gaming Consoles and Accessories', 'products': ['GameSphere X']},\n",
      "     {'category': 'Computers and Laptops', 'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\n"
     ]
    }
   ],
   "source": [
    "customer_msg_4 = f\"\"\"\n",
    "tell me about the CineView TV, the 8K one, Gamesphere console, the X one.\n",
    "I'm on a budget, what computers do you have?\"\"\"\n",
    "\n",
    "products_by_category_4 = find_category_and_product_v1(customer_msg_4,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_4)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02027e5d",
   "metadata": {
    "height": 30
   },
   "source": [
    "### 上面模型可能又进化了，没有了底部多余的信息，下面是针对底部有多余信息重新修改prompt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7d12681-997d-43a5-8732-8f3aa9fc8cb3",
   "metadata": {},
   "source": [
    "### Modify the prompt to work on the hard test cases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "609ce420",
   "metadata": {
    "height": 1048
   },
   "outputs": [],
   "source": [
    "# 针对底部多余信息的解决办法\n",
    "def find_category_and_product_v2(user_input,products_and_category):\n",
    "    \"\"\"\n",
    "    Added: Do not output any additional text that is not in JSON format.\n",
    "    Added a second example (for few-shot prompting) where user asks for \n",
    "    the cheapest computer. In both few-shot examples, the shown response \n",
    "    is the full list of products in JSON only.\n",
    "    \"\"\"\n",
    "    delimiter = \"####\"\n",
    "    system_message = f\"\"\"\n",
    "    You will be provided with customer service queries. \\\n",
    "    The customer service query will be delimited with {delimiter} characters.\n",
    "    Output a python list of json objects, where each object has the following format:\n",
    "        'category': <one of Computers and Laptops, Smartphones and Accessories, Televisions and Home Theater Systems, \\\n",
    "    Gaming Consoles and Accessories, Audio Equipment, Cameras and Camcorders>,\n",
    "    AND\n",
    "        'products': <a list of products that must be found in the allowed products below>\n",
    "    Do not output any additional text that is not in JSON format.\n",
    "    Do not write any explanatory text after outputting the requested JSON.\n",
    "\n",
    "\n",
    "    Where the categories and products must be found in the customer service query.\n",
    "    If a product is mentioned, it must be associated with the correct category in the allowed products list below.\n",
    "    If no products or categories are found, output an empty list.\n",
    "    \n",
    "\n",
    "    List out all products that are relevant to the customer service query based on how closely it relates\n",
    "    to the product name and product category.\n",
    "    Do not assume, from the name of the product, any features or attributes such as relative quality or price.\n",
    "\n",
    "    The allowed products are provided in JSON format.\n",
    "    The keys of each item represent the category.\n",
    "    The values of each item is a list of products that are within that category.\n",
    "    Allowed products: {products_and_category}\n",
    "    \n",
    "\n",
    "    \"\"\"\n",
    "    \n",
    "    few_shot_user_1 = \"\"\"I want the most expensive computer. What do you recommend?\"\"\"\n",
    "    few_shot_assistant_1 = \"\"\" \n",
    "    [{'category': 'Computers and Laptops', \\\n",
    "'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\n",
    "    \"\"\"\n",
    "    \n",
    "    few_shot_user_2 = \"\"\"I want the most cheapest computer. What do you recommend?\"\"\"\n",
    "    few_shot_assistant_2 = \"\"\" \n",
    "    [{'category': 'Computers and Laptops', \\\n",
    "'products': ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook']}]\n",
    "    \"\"\"\n",
    "    \n",
    "    messages =  [  \n",
    "    {'role':'system', 'content': system_message},    \n",
    "    {'role':'user', 'content': f\"{delimiter}{few_shot_user_1}{delimiter}\"},  \n",
    "    {'role':'assistant', 'content': few_shot_assistant_1 },\n",
    "    {'role':'user', 'content': f\"{delimiter}{few_shot_user_2}{delimiter}\"},  \n",
    "    {'role':'assistant', 'content': few_shot_assistant_2 },\n",
    "    {'role':'user', 'content': f\"{delimiter}{user_input}{delimiter}\"},  \n",
    "    ] \n",
    "    return get_completion_from_messages(messages)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32c833b2-0494-4e7b-bfbc-38f67889fb15",
   "metadata": {},
   "source": [
    "### Evaluate the modified prompt on the hard tests cases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6ae1f7ef",
   "metadata": {
    "height": 149
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    [{'category': 'Smartphones and Accessories', 'products': ['SmartX ProPhone']}, {'category': 'Cameras and Camcorders', 'products': ['FotoSnap DSLR Camera']}]\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "#### 新的问题还没解决，TVs没有回答出来\n",
    "customer_msg_3 = f\"\"\"\n",
    "tell me about the smartx pro phone and the fotosnap camera, the dslr one.\n",
    "Also, what TVs do you have?\"\"\"\n",
    "\n",
    "products_by_category_3 = find_category_and_product_v2(customer_msg_3,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6175e6a4-983c-44f7-8310-95a24bdf0c88",
   "metadata": {},
   "source": [
    "### Regression testing: verify that the model still works on previous test cases\n",
    "Check that modifying the model to fix the hard test cases does not negatively affect its performance on previous test cases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "e65041cd",
   "metadata": {
    "height": 98
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " \n",
      "    [{'category': 'Televisions and Home Theater Systems', 'products': ['CineView 4K TV', 'SoundMax Home Theater', 'CineView 8K TV', 'SoundMax Soundbar', 'CineView OLED TV']}]\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "customer_msg_0 = f\"\"\"Which TV can I buy if I'm on a budget?\"\"\"\n",
    "\n",
    "products_by_category_0 = find_category_and_product_v2(customer_msg_0,\n",
    "                                                      products_and_category)\n",
    "print(products_by_category_0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf40ac24-fd1e-4d5d-b41f-760b3e0d4d68",
   "metadata": {},
   "source": [
    "### Gather development set for automated testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "36e257c2",
   "metadata": {
    "height": 1643
   },
   "outputs": [],
   "source": [
    "msg_ideal_pairs_set = [\n",
    "    \n",
    "    # eg 0\n",
    "    {'customer_msg':\"\"\"Which TV can I buy if I'm on a budget?\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Televisions and Home Theater Systems':set(\n",
    "            ['CineView 4K TV', 'SoundMax Home Theater', 'CineView 8K TV', 'SoundMax Soundbar', 'CineView OLED TV']\n",
    "        )}\n",
    "    },\n",
    "\n",
    "    # eg 1\n",
    "    {'customer_msg':\"\"\"I need a charger for my smartphone\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Smartphones and Accessories':set(\n",
    "            ['MobiTech PowerCase', 'MobiTech Wireless Charger', 'SmartX EarBuds']\n",
    "        )}\n",
    "    },\n",
    "    # eg 2\n",
    "    {'customer_msg':f\"\"\"What computers do you have?\"\"\",\n",
    "     'ideal_answer':{\n",
    "           'Computers and Laptops':set(\n",
    "               ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook'\n",
    "               ])\n",
    "                }\n",
    "    },\n",
    "\n",
    "    # eg 3\n",
    "    {'customer_msg':f\"\"\"tell me about the smartx pro phone and \\\n",
    "    the fotosnap camera, the dslr one.\\\n",
    "    Also, what TVs do you have?\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Smartphones and Accessories':set(\n",
    "            ['SmartX ProPhone']),\n",
    "        'Cameras and Camcorders':set(\n",
    "            ['FotoSnap DSLR Camera']),\n",
    "        'Televisions and Home Theater Systems':set(\n",
    "            ['CineView 4K TV', 'SoundMax Home Theater','CineView 8K TV', 'SoundMax Soundbar', 'CineView OLED TV'])\n",
    "        }\n",
    "    }, \n",
    "    \n",
    "    # eg 4\n",
    "    {'customer_msg':\"\"\"tell me about the CineView TV, the 8K one, Gamesphere console, the X one.\n",
    "I'm on a budget, what computers do you have?\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Televisions and Home Theater Systems':set(\n",
    "            ['CineView 8K TV']),\n",
    "        'Gaming Consoles and Accessories':set(\n",
    "            ['GameSphere X']),\n",
    "        'Computers and Laptops':set(\n",
    "            ['TechPro Ultrabook', 'BlueWave Gaming Laptop', 'PowerLite Convertible', 'TechPro Desktop', 'BlueWave Chromebook'])\n",
    "        }\n",
    "    },\n",
    "    \n",
    "    # eg 5\n",
    "    {'customer_msg':f\"\"\"What smartphones do you have?\"\"\",\n",
    "     'ideal_answer':{\n",
    "           'Smartphones and Accessories':set(\n",
    "               ['SmartX ProPhone', 'MobiTech PowerCase', 'SmartX MiniPhone', 'MobiTech Wireless Charger', 'SmartX EarBuds'\n",
    "               ])\n",
    "                    }\n",
    "    },\n",
    "    # eg 6\n",
    "    {'customer_msg':f\"\"\"I'm on a budget.  Can you recommend some smartphones to me?\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Smartphones and Accessories':set(\n",
    "            ['SmartX EarBuds', 'SmartX MiniPhone', 'MobiTech PowerCase', 'SmartX ProPhone', 'MobiTech Wireless Charger']\n",
    "        )}\n",
    "    },\n",
    "\n",
    "    # eg 7 # this will output a subset of the ideal answer\n",
    "    {'customer_msg':f\"\"\"What Gaming consoles would be good for my friend who is into racing games?\"\"\",\n",
    "     'ideal_answer':{\n",
    "        'Gaming Consoles and Accessories':set([\n",
    "            'GameSphere X',\n",
    "            'ProGamer Controller',\n",
    "            'GameSphere Y',\n",
    "            'ProGamer Racing Wheel',\n",
    "            'GameSphere VR Headset'\n",
    "     ])}\n",
    "    },\n",
    "    # eg 8\n",
    "    {'customer_msg':f\"\"\"What could be a good present for my videographer friend?\"\"\",\n",
    "     'ideal_answer': {\n",
    "        'Cameras and Camcorders':set([\n",
    "        'FotoSnap DSLR Camera', 'ActionCam 4K', 'FotoSnap Mirrorless Camera', 'ZoomMaster Camcorder', 'FotoSnap Instant Camera'\n",
    "        ])}\n",
    "    },\n",
    "    \n",
    "    # eg 9\n",
    "    {'customer_msg':f\"\"\"I would like a hot tub time machine.\"\"\",\n",
    "     'ideal_answer': []\n",
    "    }\n",
    "    \n",
    "]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8aaccb7a-3cee-4189-9660-110427a4bb83",
   "metadata": {},
   "source": [
    "### Evaluate test cases by comparing to the ideal answers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "66a7df29",
   "metadata": {
    "height": 1169
   },
   "outputs": [],
   "source": [
    "import json\n",
    "def eval_response_with_ideal(response,\n",
    "                              ideal,\n",
    "                              debug=False):\n",
    "    \n",
    "    if debug:\n",
    "        print(\"response\")\n",
    "        print(response)\n",
    "    \n",
    "    # json.loads() expects double quotes, not single quotes\n",
    "    json_like_str = response.replace(\"'\",'\"')\n",
    "    \n",
    "    # parse into a list of dictionaries\n",
    "    l_of_d = json.loads(json_like_str)\n",
    "    \n",
    "    # special case when response is empty list\n",
    "    if l_of_d == [] and ideal == []:\n",
    "        return 1\n",
    "    \n",
    "    # otherwise, response is empty \n",
    "    # or ideal should be empty, there's a mismatch\n",
    "    elif l_of_d == [] or ideal == []:\n",
    "        return 0\n",
    "    \n",
    "    correct = 0    \n",
    "    \n",
    "    if debug:\n",
    "        print(\"l_of_d is\")\n",
    "        print(l_of_d)\n",
    "    for d in l_of_d:\n",
    "\n",
    "        cat = d.get('category')\n",
    "        prod_l = d.get('products')\n",
    "        if cat and prod_l:\n",
    "            # convert list to set for comparison\n",
    "            prod_set = set(prod_l)\n",
    "            # get ideal set of products\n",
    "            ideal_cat = ideal.get(cat)\n",
    "            if ideal_cat:\n",
    "                prod_set_ideal = set(ideal.get(cat))\n",
    "            else:\n",
    "                if debug:\n",
    "                    print(f\"did not find category {cat} in ideal\")\n",
    "                    print(f\"ideal: {ideal}\")\n",
    "                continue\n",
    "                \n",
    "            if debug:\n",
    "                print(\"prod_set\\n\",prod_set)\n",
    "                print()\n",
    "                print(\"prod_set_ideal\\n\",prod_set_ideal)\n",
    "\n",
    "            if prod_set == prod_set_ideal:\n",
    "                if debug:\n",
    "                    print(\"correct\")\n",
    "                correct +=1\n",
    "            else:\n",
    "                print(\"incorrect\")\n",
    "                print(f\"prod_set: {prod_set}\")\n",
    "                print(f\"prod_set_ideal: {prod_set_ideal}\")\n",
    "                if prod_set <= prod_set_ideal:\n",
    "                    print(\"response is a subset of the ideal answer\")\n",
    "                elif prod_set >= prod_set_ideal:\n",
    "                    print(\"response is a superset of the ideal answer\")\n",
    "\n",
    "    # count correct over total number of items in list\n",
    "    pc_correct = correct / len(l_of_d)\n",
    "        \n",
    "    return pc_correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e7337ba6",
   "metadata": {
    "height": 64
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Customer message: What Gaming consoles would be good for my friend who is into racing games?\n",
      "Ideal answer: {'Gaming Consoles and Accessories': {'ProGamer Controller', 'GameSphere VR Headset', 'GameSphere X', 'GameSphere Y', 'ProGamer Racing Wheel'}}\n"
     ]
    }
   ],
   "source": [
    "print(f'Customer message: {msg_ideal_pairs_set[7][\"customer_msg\"]}')\n",
    "print(f'Ideal answer: {msg_ideal_pairs_set[7][\"ideal_answer\"]}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f109f542",
   "metadata": {
    "height": 115
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Resonse:  \n",
      "    [{'category': 'Gaming Consoles and Accessories', 'products': ['GameSphere X', 'ProGamer Controller', 'GameSphere Y', 'ProGamer Racing Wheel', 'GameSphere VR Headset']}]\n",
      "    \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = find_category_and_product_v2(msg_ideal_pairs_set[7][\"customer_msg\"],\n",
    "                                         products_and_category)\n",
    "print(f'Resonse: {response}')\n",
    "\n",
    "eval_response_with_ideal(response,\n",
    "                              msg_ideal_pairs_set[7][\"ideal_answer\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "38ebaf7b-ee94-4b8c-b191-bf23864aed56",
   "metadata": {},
   "source": [
    "### Run evaluation on all test cases and calculate the fraction of cases that are correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "bb75bebc",
   "metadata": {
    "height": 404
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "example 0\n",
      "0: 1.0\n",
      "example 1\n",
      "incorrect\n",
      "prod_set: {'SmartX MiniPhone', 'MobiTech PowerCase', 'SmartX EarBuds', 'MobiTech Wireless Charger', 'SmartX ProPhone'}\n",
      "prod_set_ideal: {'MobiTech Wireless Charger', 'MobiTech PowerCase', 'SmartX EarBuds'}\n",
      "response is a superset of the ideal answer\n",
      "1: 0.0\n",
      "example 2\n",
      "2: 1.0\n",
      "example 3\n",
      "3: 1.0\n",
      "example 4\n",
      "4: 1.0\n",
      "example 5\n",
      "5: 1.0\n",
      "example 6\n",
      "6: 1.0\n",
      "example 7\n",
      "7: 1.0\n",
      "example 8\n",
      "8: 1.0\n",
      "example 9\n",
      "9: 1\n",
      "Fraction correct out of 10: 0.9\n"
     ]
    }
   ],
   "source": [
    "# Note, this will not work if any of the api calls time out\n",
    "score_accum = 0\n",
    "for i, pair in enumerate(msg_ideal_pairs_set):\n",
    "    print(f\"example {i}\")\n",
    "    \n",
    "    customer_msg = pair['customer_msg']\n",
    "    ideal = pair['ideal_answer']\n",
    "    \n",
    "    # print(\"Customer message\",customer_msg)\n",
    "    # print(\"ideal:\",ideal)\n",
    "    response = find_category_and_product_v2(customer_msg,\n",
    "                                                      products_and_category)\n",
    "\n",
    "    \n",
    "    # print(\"products_by_category\",products_by_category)\n",
    "    score = eval_response_with_ideal(response,ideal,debug=False)\n",
    "    print(f\"{i}: {score}\")\n",
    "    score_accum += score\n",
    "    \n",
    "\n",
    "n_examples = len(msg_ideal_pairs_set)\n",
    "fraction_correct = score_accum / n_examples\n",
    "print(f\"Fraction correct out of {n_examples}: {fraction_correct}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed51e0f4",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
